In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

data = pd.read_csv("data.csv")
print(data.head())
   YearsExperience   Salary
0              1.1  39343.0
1              1.3  46205.0
2              1.5  37731.0
3              2.0  43525.0
4              2.2  39891.0
In [2]:
print(data.isnull().sum())
YearsExperience    0
Salary             0
dtype: int64
In [3]:
figure = px.scatter(data_frame = data, 
                    x="Salary",
                    y="YearsExperience", 
                    size="YearsExperience", 
                    trendline="ols")
figure.show()
In [5]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

x = np.asanyarray(data[["YearsExperience"]])
y = np.asanyarray(data[["Salary"]])
xtrain, xtest, ytrain, ytest = train_test_split(x, y, 
                                                test_size=0.2, 
                                                random_state=42)
In [6]:
model = LinearRegression()
model.fit(xtrain, ytrain)
Out[6]:
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LinearRegression()
In [13]:
a = float(input("Years of Experience : "))
features = np.array([[a]])
print("Predicted Salary = ", model.predict(features))
Years of Experience : 1.5
Predicted Salary =  [[39457.30599632]]
In [ ]: